home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
prolog
/
sbprolog
/
v3
/
sim.lha
/
sim
/
builtin
/
token.c
< prev
Wrap
C/C++ Source or Header
|
1991-05-21
|
33KB
|
883 lines
/* File : Token.c
Author : Richard A. O'Keefe
Modified by : Deeporn H. Beardsley & Saumya Debray
Updated : Summer 1988
Purpose : Tokenizer for SB-Prolog.
*/
#ifdef vms
#include stdio
#else
#include <stdio.h>
#endif
/* stuff defined to interface with SB-Prolog */
#include "builtin.h"
#include <errno.h>
/* We used to use an 8-bit character set under VMS, but 7-bit ASCII
* elsewhere. Now that DIS 8859/1 exists (a draft international
* standard for an 8-bit extension of ASCII) we use that, and we are
* in luck: it is almost identical to the VMS character set.
*/
#define AlphabetSize 256
#define SBPMAXINT 268435455
extern char *strcpy(/* CHAR_PTR, CHAR_PTR */);
#define StrCpy(dst, src) (void)strcpy(dst, src)
#define Printf (void)printf
#define Sprintf (void)sprintf
#define Fprintf (void)fprintf
#define InRange(X,L,U) ((unsigned)((X)-(L)) <= (unsigned)((U)-(L)))
#define IsLayout(X) InRange(InType(X), SPACE, EOLN)
/* VERY IMPORTANT NOTE: I assume that the stdio library returns the value
* EOF when character input hits the end of the file, and that this value
* is actually the integer -1. You will note the DigVal(), InType(), and
* OuType() macros below, and there is a ChType() macro used in crack().
* They all depend on this assumption.
*/
#define DIGIT 0 /* 0 .. 9 */
#define BREAK 1 /* _ */
#define UPPER 2 /* A .. Z */
#define LOWER 3 /* a .. z */
#define SIGN 4 /* -/+*<=>#@$\^&~`:.? */
#define NOBLE 5 /* !; (don't form compounds) */
#define PUNCT 6 /* (),[]|{}% */
#define ATMQT 7 /* ' (atom quote) */
#define LISQT 8 /* " (list quote) */
#define STRQT 9 /* $ (string quote) */
#define CHRQT 10 /* ` (character quote, maybe) */
#define TILDE 11 /* ~ (like character quote but buggy) */
#define SPACE 12 /* layout and control chars */
#define EOLN 13 /* line terminators ^J ^L */
#define REALO 14 /* floating point number */
#define EOFCH 15 /* end of file */
#define ALPHA DIGIT /* any of digit, break, upper, lower */
#define BEGIN BREAK /* atom left-paren pair */
#define ENDCL EOLN /* end of clause token */
#define RREAL 16 /* radix number(real) - overflowed */
#define RDIGIT 17 /* radix number(int) */
#define InType(c) (intab.chtype+1)[c]
#define DigVal(c) (digval+1)[c]
BYTE outqt[EOFCH+1];
struct CHARS
{
int eolcom; /* End-of-line comment, default % */
int endeol; /* early terminator of eolcoms, default none */
int begcom; /* In-line comment start, default / */
int astcom; /* In-line comment second, default * */
int endcom; /* In-line comment finish, default / */
int radix; /* Radix character, default ' */
int dpoint; /* Decimal point, default . */
int escape; /* String escape character, default \ */
int termin; /* Terminates a clause */
CHAR chtype[AlphabetSize+1];
};
struct CHARS intab = /* Special character table */
{
'%', /* eolcom: end of line comments */
-1, /* endeol: early end for eolcoms */
'/', /* begcom: in-line comments */
'*', /* astcom: in-line comments */
'/', /* endcom: in-line comments */
'\'', /* radix : radix separator */
'.', /* dpoint: decimal point */
-1, /* escape: string escape character */
'.', /* termin: ends clause, sign or solo */
{
EOFCH, /* really the -1th element of the table: */
/* ^@ ^A ^B ^C ^D ^E ^F ^G */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* ^H ^I ^J ^K ^L ^M ^N ^O */
SPACE, SPACE, EOLN, SPACE, EOLN, SPACE, SPACE, SPACE,
/* ^P ^Q ^R ^S ^T ^U ^V ^W */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* sp ! " # $ % & ' */
SPACE, NOBLE, LISQT, SIGN, LOWER, PUNCT, SIGN, ATMQT,
/* ( ) * + , - . / */
PUNCT, PUNCT, SIGN, SIGN, PUNCT, SIGN, SIGN, SIGN,
/* 0 1 2 3 4 5 6 7 */
DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
/* 8 9 : ; < = > ? */
DIGIT, DIGIT, SIGN, PUNCT, SIGN, SIGN, SIGN, SIGN,
/* @ A B C D E F G */
SIGN, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
/* H I J K L M N O */
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
/* P Q R S T U V W */
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
/* X Y Z [ \ ] ^ _ */
UPPER, UPPER, UPPER, PUNCT, SIGN, PUNCT, SIGN, BREAK,
/* ` a b c d e f g */
SIGN, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
/* h i j k l m n o */
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
/* p q r s t u v w */
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
/* x y z { | } ~ ^? */
LOWER, LOWER, LOWER, PUNCT, PUNCT, PUNCT, SIGN, SPACE,
/* 128 129 130 131 132 133 134 135 */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* 136 137 138 139 140 141 142 143 */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* 144 145 146 147 148 149 150 151 */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* 152 153 154 155 156 157 158 159 */
SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
/* NBSP !-inv cents pounds ching yen brobar section */
SPACE, SIGN, SIGN, SIGN, SIGN, SIGN, SIGN, SIGN,
/* "accent copyr -a ord << nothook SHY (reg) ovbar */
SIGN, SIGN, LOWER, SIGN, SIGN, SIGN, SIGN, SIGN,
/* degrees +/- super 2 super 3 - micron pilcrow - */
SIGN, SIGN, LOWER, LOWER, SIGN, SIGN, SIGN, SIGN,
/* , super 1 -o ord >> 1/4 1/2 3/4 ?-inv */
SIGN, LOWER, LOWER, SIGN, SIGN, SIGN, SIGN, SIGN,
/* `A 'A ^A ~A "A oA AE ,C */
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
/* `E 'E ^E "E `I 'I ^I "I */
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
/* ETH ~N `O 'O ^O ~O "O x times */
#ifdef vms
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
#else
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, SIGN,
#endif
/* /O `U 'U ^U "U 'Y THORN ,B */
UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, LOWER,
/* `a 'a ^a ~a "a oa ae ,c */
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
/* `e 'e ^e "e `i 'i ^i "i */
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
/* eth ~n `o 'o ^o ~o "o -:- */
#ifdef vms
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
#else
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, SIGN,
#endif
/* /o `u 'u ^u "u 'y thorn "y */
#ifdef vms
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, SPACE
#else
LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER
#endif
}
};
CHAR digval[AlphabetSize+1] =
{
99, /* really the -1th element of the table */
/* ^@ ^A ^B ^C ^D ^E ^F ^G */
99, 99, 99, 99, 99, 99, 99, 99,
/* ^H ^I ^J ^K ^L ^M ^N ^O */
99, 99, 99, 99, 99, 99, 99, 99,
/* ^P ^Q ^R ^S ^T ^U ^V ^W */
99, 99, 99, 99, 99, 99, 99, 99,
/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
99, 99, 99, 99, 99, 99, 99, 99,
/* sp ! " # $ % & ' */
99, 99, 99, 99, 99, 99, 99, 99,
/* ( ) * + , - . / */
99, 99, 99, 99, 99, 99, 99, 99,
/* 0 1 2 3 4 5 6 7 */
0, 1, 2, 3, 4, 5, 6, 7,
/* 8 9 : ; < = > ? */
8, 9, 99, 99, 99, 99, 99, 99,
/* @ A B C D E F G */
99, 10, 11, 12, 13, 14, 15, 99,
/* H I J K L M N O */
99, 99, 99, 99, 99, 99, 99, 99,
/* P Q R S T U V W */
99, 99, 99, 99, 99, 99, 99, 99,
/* X Y Z [ \ ] ^ _ */
99, 99, 99, 99, 99, 99, 99, 0, /*NB*/
/* ` a b c d e f g */
99, 10, 11, 12, 13, 14, 15, 99,
/* h i j k l m n o */
99, 99, 99, 99, 99, 99, 99, 99,
/* p q r s t u v w */
99, 99, 99, 99, 99, 99, 99, 99,
/* x y z { | } ~ ^? */
99, 99, 99, 99, 99, 99, 99, 99,
/* 128 129 130 131 132 133 134 135 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 136 137 138 139 140 141 142 143 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 144 145 146 147 148 149 150 151 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 152 153 154 155 156 157 158 159 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 160 161 162 163 164 165 166 167 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 168 169 170(-a) 171 172 173 174 175 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 176 177 178(2) 179(3) 180 181 182 183 */
99, 99, 2, 3, 99, 99, 99, 99,
/* 184 185(1) 186(-o) 187 188 189 190 191 */
99, 1, 99, 99, 99, 99, 99, 99,
/* 192 193 194 195 196 197 198 199 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 200 201 202 203 204 205 206 207 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 208 209 210 211 212 213 214 215 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 216 217 218 219 220 221 222 223 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 224 225 226 227 228 229 230 231 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 232 233 234 235 236 237 238 239 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 240 241 242 243 244 245 246 247 */
99, 99, 99, 99, 99, 99, 99, 99,
/* 248 249 250 251 252 253 254 255 */
99, 999, 99, 99, 99, 99, 99, 99
};
/* values returned to calling program */
#define SPECIAL 0 /* puncuation , ( ) [ ] ... */
#define VARO 1 /* type is a variable */
#define FUNC 2 /* type is atom( */
#define NUMBERO 3 /* type is a number */
#define ATOMO 4 /* type is an atom */
#define ENDCLS 5 /* END of clause but not file */
#define USCORE 6 /* underscore '_' */
#define SEMI 7 /* ; */
#define BADEND 8 /* END of file, not end of clause */
#define STRING 9 /* type is a char string */
int cNUMERO = 0, cATOMO = 0, cFUNC = 0, cVARO = 0, cUSCORE = 0,
cSTRING = 0, cSPECIAL = 0, cSEMI = 0, cENDCLS = 0, cENDPRG = 0;
extern LONG_PTR insert();
static BYTE perm = PERM;
extern FILE *curr_in, *curr_out; /* current input, output streams */
void SyntaxError(message)
CHAR_PTR message;
{
Fprintf(stderr, "Syntax error: %s\n", message);
exit(1);
}
/* GetToken() reads a single token from the input stream and returns
* its type, which is one of
* DIGIT -- a number
* BEGIN -- an atom( pair
* LOWER -- an atom
* UPPER -- a variable
* PUNCT -- a single punctuation mark
* LISQT -- a quoted list of character codes
* STRQT -- a quoted string
* ENDCL -- end of clause (normally '.\n').
* EOFCH -- signifies end-of-file.
* RREAL -- a real, from some radix notation, in double_v.
* RDIGIT -- an integer, from some radix notation, in rad_int.
* In all cases except the last, the text of the token is in AtomStr.
* There are two questions: between which pairs of adjacent tokens is
* a space (a) necessary, (b) desirable? There is an additional
* dummy token type used by the output routines, namely
* NOBLE -- extra space is definitely not needed.
* I leave it as an exercise for the reader to answer question (a).
* Since this program is to produce output I find palatable (even if
* it isn't exactly what I'd write myself), extra spaces are ok. In
* fact, the main use of this program is as an editor command, so it
* is normal to do a bit of manual post-processing. Question (b) is
* the one to worry about then. My answer is that a space is never
* written
* - after PUNCT ( [ { |
* - before PUNCT ) ] } | , <ENDCL>
* is written after comma only sometimes, and is otherwise always
* written. The variable lastput thus takes these values:
* ALPHA -- put a space except before PUNCT
* SIGN -- as alpha, but different so ENDCL knows to put a space.
* NOBLE -- don't put a space
* ENDCL -- just ended a clause
* EOFCH -- at beginning of file
*/
int lastc = ' '; /* previous character */
#define MaxStrLen 1000
BYTE AtomStr[MaxStrLen+20];
LONG list_p;
int rtnint;
double double_v;
LONG rad_int;
CHAR tok2long[] = "token too long";
CHAR eofinrem[] = "end of file in comment";
CHAR badexpt[] = "bad exponent";
CHAR badradix[] = "radix > 36";
/* read_character(FILE* card, BYTE q)
* reads one character from a quoted atom, list, string, or character.
* Doubled quotes are read as single characters, otherwise a
* quote is returned as -1 and lastc is set to the next character.
* If the input syntax has character escapes, they are processed.
* Note that many more character escape sequences are accepted than
* are generated. There is a divergence from C: \xhh sequences are
* two hexadecimal digits long, not three.
* Note that the \c and \<space> sequences combine to make a pretty
* way of continuing strings. Do it like this:
* "This is a string, which \c
* \ has to be continued over \c
* \ several lines.\n".
*/
int read_character(card, q)
register FILE *card;
register int q;
{
register int c;
c = getc(card);
BACK:
if (c < 0) {
DOERR:
if (q < 0)
SyntaxError("end of file in character constant");
else {
CHAR message[80];
Sprintf(message, "end of file in %cquoted%c constant", q, q);
SyntaxError(message);
}
}
if (c == q) {
c = getc(card);
if (c == q)
return c;
lastc = c;
return -1;
} else if (c != intab.escape)
return c;
/* If we get here, we have read the "\" of an escape sequence */
c = getc(card);
switch (c) {
case EOF:
clearerr(curr_in);
goto DOERR;
case 'n': case 'N': /* newline */
return 10;
case 't': case 'T': /* tab */
return 9;
case 'r': case 'R': /* reeturn */
return 13;
case 'v': case 'V': /* vertical tab */
return 11;
case 'b': case 'B': /* backspace */
return 8;
case 'f': case 'F': /* formfeed */
return 12;
case 'e': case 'E': /* escape */
return 27;
case 'd': case 'D': /* delete */
return 127;
case 's': case 'S': /* space */
return 32;
case 'a': case 'A': /* alarm */
return 7;
case '^': /* control */
c = getc(card);
if (c < 0)
goto DOERR;
return (c == '?' ? 127 : c&31);
case 'c': case 'C': /* continuation */
while (IsLayout(c = getc(card)))
;
goto BACK;
case 'x': case 'X': /* hexadecimal */
{ int i, n;
for (n = 0, i = 2; --i >= 0; n = (n<<4) + DigVal(c))
if (DigVal(c = getc(card)) >= 16) {
if (c < 0)
goto DOERR;
(void)ungetc(c, card);
break;
}
return (n & 255);
}
case 'o': case 'O': /* octal */
c = getc(card);
if (DigVal(c) >= 8) {
if (c < 0)
goto DOERR;
(void)ungetc(c, card);
return 0;
}
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{ int i, n;
for (n = c-'0', i = 2; --i >= 0; n = (n<<3) + DigVal(c))
if (DigVal(c = getc(card)) >= 8) {
if (c < 0)
goto DOERR;
(void)ungetc(c, card);
break;
}
return (n & 255);
}
default:
if (!IsLayout(c))
return c;
c = getc(card);
goto BACK;
}
}
/* com0plain(card, endeol)
* These comments have the form
* <eolcom> <char>* <newline> {PUNCT}
* or <eolcom><eolcom> <char>* <newline> {SIGN }
* depending on the classification of <eolcom>. Note that we could
* handle ADA comments with no trouble at all. There was a Pop-2
* dialect which had end-of-line comments using "!" where the comment
* could also be terminated by "!". You could obtain the effect of
* including a "!" in the comment by doubling it, but what you had
* then was of course two comments. The endeol parameter of this
* function allows the handling of comments like that which can be
* terminated either by a new-line character or an <endeol>, whichever
* comes first. For ordinary purposes, endeol = -1 will do fine.
* When this is called, the initial <eolcom>s have been consumed.
* We return the first character after the comment.
* If the end of the source file is encountered, we do not treat it
* as an error, but quietly close the comment and return EOF as the
* "FOLLOWing" character.
*/
int com0plain(card, endeol)
register FILE *card; /* source file */
register int endeol; /* The closing character "!" */
{
register int c;
while ((c = getc(card)) >= 0 && c != '\n' && c != endeol)
;
if (c >= 0)
c = getc(card);
return c;
}
/* The states in the next two functions are
* 0 - after an uninteresting character
* 1 - after an "astcom"
* 2 - after a "begcom"
* Assuming begcom = "(", astom = "#", endcom = ")",
* com2plain will accept "(#)" as a complete comment. This can
* be changed by initialising the state to 0 rather than 1.
* The same is true of com2nest, which accepts "(#(#)#) as a
* complete comment. Changing it would be rather harder.
* Fixing the bug where the closing <astcom> is copied if it is
* not an asterisk may entail rejecting "(#)".
*/
/* com2plain(card, astcom, endcom)
* handles PL/I-style comments, that is, comments which begin with
* a pair of characters <begcom><astcom> and end with a pair of
* chracters <astcom><endcom>, where nesting is not allowed. For
* example, if we take begcom='(', astcom='*', endcom=')' as in
* Pascal, the comment "(* not a (* plain *)^ comment *) ends at
* the "^".
* For this kind of comment, it is perfectly sensible for any of
* the characters to be equal. For example, if all three of the
* bracket characters are "#", then "## stuff ##" is a comment.
* When this is called, the initial <begcom><astcom> has been consumed.
*/
void com2plain(card, astcom, endcom)
register FILE *card; /* source file */
int astcom; /* The asterisk character "*" */
int endcom; /* The closing character "/" */
{
register int c;
register int state;
for (state = 0; (c = getc(card)) >= 0; ) {
if (c == endcom && state)
break;
state = c == astcom;
}
if (c < 0)
SyntaxError(eofinrem);
}
int GetToken()
{
register FILE *card = curr_in;
register BYTE_PTR s = AtomStr;
register int c, d;
register int n = MaxStrLen;
LONG oldv = 0, newv = 0;
LONG_PTR newpair, list_head, stack_top;
c = lastc;
START:
switch (InType(c)) {
case DIGIT:
/* The FOLLOWing kinds of numbers exist:
* (1) unsigned decimal integers: d+
* (2) unsigned based integers: d+Ro+[R]
* (3) unsigned floats: d* [. d*] [e +/-] d+
* (4) characters: 0Rc[R]
* We allow underscores in numbers too, ignoring them.
*/
do {
if (c != '_')
*s++ = c;
c = getc(card);
} while (InType(c) <= BREAK);
if (c == intab.radix) {
*s = 0;
for (d = 0, s = AtomStr; c = *s++; ) {
d = d*10-'0'+c;
if (d > 36)
SyntaxError(badradix);
}
if (d == 0) { /* 0'c['] is a character code */
d = read_character(card, -1);
Sprintf(AtomStr, "%d", d);
d = getc(card);
lastc = d == intab.radix ? getc(card) : d;
return DIGIT;
}
while (c = getc(card), DigVal(c) < 99)
if (c != '_') {
oldv = newv;
newv = newv*d + DigVal(c);
if (newv < oldv || newv > SBPMAXINT) {
Fprintf(stderr, "*** overflow in radix notation ***\n");
double_v = oldv*1.0*d + DigVal(c);
while (c = getc(card), DigVal(c) < 99)
if (c != '_')
double_v = double_v*d + DigVal(c);
if (c == intab.radix)
c = getc(card);
lastc = c;
return RREAL;
}
}
/*
Sprintf(AtomStr, "%ld", newv);
*/
rad_int = newv;
if (c == intab.radix)
c = getc(card);
lastc = c;
return RDIGIT;
} else if (c == intab.dpoint) {
d = getc(card);
if (InType(d) == DIGIT) {
DECIMAL: *s++ = '.';
do {
if (d != '_')
*s++ = d;
d = getc(card);
} while (InType(d) <= BREAK);
if ((d | 32) == 'e') {
*s++ = 'E';
d = getc(card);
if (d == '-') {
*s++ = d;
d = getc(card);
} else if (d == '+')
d = getc(card);
if (InType(d) > BREAK)
SyntaxError(badexpt);
do {
if (d != '_')
*s++ = d;
d = getc(card);
} while (InType(d) <= BREAK);
}
c = d;
*s = 0;
lastc = c;
return REALO;
} else /* c has not changed */
ungetc(d, card);
}
*s = 0;
lastc = c;
return DIGIT;
case BREAK:
case UPPER:
do {
if (--n < 0)
SyntaxError(tok2long);
*s++ = c;
c = getc(card);
} while (InType(c) <= LOWER);
*s = 0;
lastc = c;
rtnint = (int)(s - AtomStr);
return UPPER;
case LOWER:
do {
if (--n < 0) SyntaxError(tok2long);
*s++ = c;
c = getc(card);
} while (InType(c) <= LOWER);
*s = 0;
SYMBOL: if (c == '(') {
lastc = getc(card);
rtnint = (int)(s - AtomStr);
return BEGIN;
} else {
lastc = c;
rtnint = (int)(s - AtomStr);
return LOWER;
}
case SIGN:
*s = c;
d = getc(card);
if (c == intab.begcom && d == intab.astcom) {
ASTCOM: com2plain(card, d, intab.endcom);
c = getc(card);
goto START;
} else if (c == intab.dpoint && InType(d) == DIGIT) {
*s++ = '0';
goto DECIMAL;
}
while (InType(d) == SIGN) {
if (--n == 0)
SyntaxError(tok2long);
*++s = d;
d = getc(card);
}
*++s = 0;
if (InType(d) >= SPACE && c == intab.termin && AtomStr[1] == 0) {
lastc = d;
return ENDCL; /* i.e. '.' FOLLOWed by layout */
}
c = d;
goto SYMBOL;
case NOBLE:
if (c == intab.termin) {
*s = 0;
lastc = ' ';
return ENDCL;
} else if (c == intab.eolcom) {
c = com0plain(card, intab.endeol);
goto START;
}
*s++ = c;
*s = 0;
lastc = c = getc(card);
goto SYMBOL;
case PUNCT:
if (c == intab.termin) {
*s = 0;
lastc = ' ';
return ENDCL;
} else if (c == intab.eolcom) {
c = com0plain(card, intab.endeol);
goto START;
}
d = getc(card);
if (c == intab.begcom && d == intab.astcom)
goto ASTCOM;
/* If we arrive here, c is an ordinary punctuation mark */
if (c == '(') /* need to distingusih between atom( and atom ( */
*s++ = ' ';
lastc = d;
*s++ = c;
*s = 0;
rtnint = (int)(s - AtomStr);
return PUNCT;
case CHRQT:
/* `c[`] is read as an integer.
* Eventually we should treat characters as a distinct
* token type, so they can be generated on output.
* If the character quote, atom quote, list quote,
* or string quote is the radix character, we should
* generate 0'x notation, otherwise `x`.
*/
d = read_character(card, -1);
Sprintf(AtomStr, "%d", d);
d = getc(card);
lastc = d == c ? getc(card) : d;
return DIGIT;
case ATMQT:
case STRQT:
while ((d = read_character(card, c)) >= 0) {
if (--n < 0) SyntaxError(tok2long);
*s++ = d;
}
*s = 0;
rtnint = (int) (s - AtomStr);
c = lastc;
goto SYMBOL;
case LISQT:
/* check for potential heap overflow */
/* (this will guarantee space for lists of up to 50 elements) */
stack_top = (breg < ereg) ? breg : ereg - ENV_SIZE(cpreg);
if (stack_top < hreg + 100) {
/* garbage_collection("GetToken"); */
if (stack_top < hreg + 100) /* still too full */
quit("Heap overflow\n");
}
list_head = newpair = hreg;
while ((d = read_character(card, c)) >= 0) {
hreg += 2;
*newpair++ = MAKEINT(d);
*newpair++ = (LONG)hreg | LIST_TAG;
}
if (list_head == hreg) /* null string */
list_p = nil_sym;
else {
*(--newpair) = nil_sym;
list_p = (LONG)list_head | LIST_TAG;
}
return LISQT;
case EOLN:
case SPACE:
c = getc(card);
goto START;
case EOFCH:
clearerr(curr_in);
return EOFCH;
}
Fprintf(stderr, "Internal error: InType(%d)==%d\n", c, InType(c));
abort(); /* There is no way we can get here */
/*NOTREACHED*/
}
void b_NEXT_TOKEN()
{
register LONG op;
register LONG_PTR top;
register FILE *card = curr_in;
int i, atoi(), oldnum, newnum;
int len;
double atof();
LONG makefloat(), ptr;
i = GetToken();
switch (i) {
case LOWER:
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(ATOMO);
ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
op = reg[2]; DEREF(op); FOLLOW(op) = ptr;
cATOMO++;
break;
case BEGIN:
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(FUNC);
ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
op = reg[2]; DEREF(op); FOLLOW(op) = ptr;
cFUNC++;
break;
case UPPER:
if (AtomStr[0] == '_' && AtomStr[1] == 0) {
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(USCORE);
cUSCORE++;
} else {
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(VARO);
cVARO++;
}
if (rtnint > 256) {
AtomStr[256] = 0;
rtnint = 256;
Fprintf(stderr, "*** Name of constant too long: %s\n", AtomStr);
}
ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
op = reg[2]; DEREF(op); FOLLOW(op) = ptr;
break;
case REALO:
op = reg[2]; DEREF(op);
FOLLOW(op) = makefloat(atof(AtomStr));
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(NUMBERO);
cNUMERO++;
break;
case RREAL:
op = reg[2]; DEREF(op); FOLLOW(op) = makefloat(double_v);
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(NUMBERO);
cNUMERO++;
break;
case RDIGIT:
cNUMERO++;
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(NUMBERO);
op = reg[2]; DEREF(op); FOLLOW(op) = MAKEINT(rad_int);
break;
case DIGIT:
cNUMERO++;
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(NUMBERO);
op = reg[2]; DEREF(op);
for (len = oldnum = newnum = 0; AtomStr[len] != 0; len++) {
oldnum = newnum;
newnum = newnum * 10 + DigVal(AtomStr[len]);
if (newnum < oldnum || newnum > SBPMAXINT) {
Fprintf(stderr, "*** overflow >> %s\n", AtomStr);
len = strlen(AtomStr);
AtomStr[len++] = '.';
AtomStr[len++] = '0';
AtomStr[len] = 0;
FOLLOW(op) = makefloat(atof(AtomStr));
return;
}
}
FOLLOW(op) = MAKEINT(newnum);
break;
case LISQT:
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(STRING);
op = reg[2]; DEREF(op); FOLLOW(op) = list_p;
cSTRING++;
break;
case PUNCT:
/* there are nine punctuation marks, */
/* ( , ) [ | ] { ; } */
/* % is listed as one, but isn't really. */
if (AtomStr[0] == ';') {
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(SEMI);
cSEMI++;
} else {
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(SPECIAL);
cSPECIAL++;
ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
op = reg[2]; DEREF(op); FOLLOW(op) = ptr;
}
break;
case ENDCL:
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(ENDCLS);
cENDCLS++;
break;
case EOFCH:
op = reg[1]; DEREF(op); FOLLOW(op) = MAKEINT(BADEND);
break;
default:
Fprintf(stderr, "Internal error %d %s\n", i, AtomStr);
}
}